\n\n \n\n \n\n \n \n\n \n\n \n \n\n \n\n \n

\n generated by\n \n $\"bibbase.org\"\n$ \n \n

\n \n\n

\n\n

\n \n Group by\n \n \n
- \n Year\n
- \n Author\n
- \n Type\n
- \n Keyword\n
- \n Downloads\n
\n
\n \n \n \n \n
- \n \n Expand/Collapse All\n \n
- \n \n Download BibTeX\n \n
- \n \n RSS Feed\n \n
\n

\n\n\n

\n\n Excellent! Next you can\n create a new website with this list, or\n embed it in an existing web page by copying & pasting\n any of the following snippets.\n\n

\n JavaScript\n (easiest)\n

\n            <script src=\"https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fnetwork%2Ffiles%2Fe2kjGxYgtBo8SWSbC&authorFirst=1&nocache=1&fullnames=1&theme=bullets&group0=year&group1=type&owner={}&filter=tags:CEDL&jsonp=1&jsonp=1\"></script>\n

\n\n PHP\n

\n            <?php\n            $contents = file_get_contents(\"https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fnetwork%2Ffiles%2Fe2kjGxYgtBo8SWSbC&authorFirst=1&nocache=1&fullnames=1&theme=bullets&group0=year&group1=type&owner={}&filter=tags:CEDL&jsonp=1\");\n            print_r($contents);\n            ?>\n

\n\n iFrame\n (not recommended)\n

\n            <iframe src=\"https://bibbase.org/show?bib=https%3A%2F%2Fbibbase.org%2Fnetwork%2Ffiles%2Fe2kjGxYgtBo8SWSbC&authorFirst=1&nocache=1&fullnames=1&theme=bullets&group0=year&group1=type&owner={}&filter=tags:CEDL&jsonp=1\"></iframe>\n

\n\n

\n For more details see the documention.\n

\n\n

\n\n This is a preview! To use this list on your own web site\n or create a new web site from it,\n create a free account. The file will be added\n and you will be able to edit it in the File Manager.\n We will show you instructions once you've created your account.\n

\n\n

To the site owner:

\n\n

Action required! Mendeley is changing its\n API. In order to keep using Mendeley with BibBase past April\n 14th, you need to:\n

renew the authorization for BibBase on Mendeley, and
update the BibBase URL\n in your page the same way you did when you initially set up\n this page.\n

\n\n

\n \n \n Fix it now\n

\n\n

\n\n\n

\n \n \n

\n \n 2023\n \n \n (1)\n \n \n

\n \n \n

\n \n 2\n \n \n (3)\n \n \n

\n \n \n

\n \n\n \n \n Navjot Singh; Deepesh Data; Jemin George; and Suhas Diggavi.\n\n\n \n \n \n \n \n SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Optimization.\n \n \n \n \n\n\n \n\n\n\n IEEE Trans. Autom. Control., 68(2): 721–736. 2023.\n \n\n\n\n
\n\n\n\n \n \n $\"SPARQ-SGD:$ Paper\n \n \n \n $\"SPARQ-SGD:$ arxiv\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 10 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@ARTICLE{9691792,  \nauthor={Singh, Navjot and Data, Deepesh and George, Jemin and Diggavi, Suhas},  journal={IEEE Transactions on Automatic Control}, \ntitle={SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Optimization}, \njournal={{IEEE} Trans. Autom. Control.},  \nvolume       = {68},\nnumber       = {2},\npages        = {721--736},\nyear         = {2023},\nurl          = {https://doi.org/10.1109/TAC.2022.3145576},\ndoi          = {10.1109/TAC.2022.3145576},\nabstract={In this paper, we propose and analyze SPARQ-SGD, a communication efficient algorithm for decentralized training of large-scale machine learning models over a graph with n nodes, where communication efficiency is achieved using compressed exchange of local model parameters among neighboring nodes, which is triggered only when an event (a locally computable condition) is satisfied. Specifically, in SPARQ-SGD, each node takes a fixed number of local gradient steps and then checks if the model parameters have significantly changed compared to its last update; only when the change is beyond a certain threshold (specified by a design criterion), it compresses its local model parameters using both quantization and sparsification and communicates them to its neighbors. We prove that SPARQ-SGD converges as O(1/nT) and O(1/sqrt(nT)) in the strongly-convex and non-convex settings, respectively, matching the convergence rates of plain decentralized SGD. This demonstrates that we get communication efficiency achieved by aggressive compression, local iterations, and event-triggered communication essentially for free.},  \nkeywords={},  \ndoi={10.1109/TAC.2022.3145576},  \nISSN={1558-2523},  \nmonth={},\ntype={2},\ntags={journal,DML,CEDL},\nurl_arxiv={https://arxiv.org/abs/1910.14280},\n}\n\n\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n Xuanyu Cao; Tamer Başar; Suhas Diggavi; Yonina C. Eldar; Khaled B. Letaief; H. Vincent Poor; and Junshan Zhang.\n\n\n \n \n \n \n Guest Editorial Communication-Efficient Distributed Learning Over Networks.\n \n \n \n\n\n \n\n\n\n IEEE Journal on Selected Areas in Communications, 41(4): 845-850. April 2023.\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@ARTICLE{10075678,\n  author={Cao, Xuanyu and Başar, Tamer and Diggavi, Suhas and Eldar, Yonina C. and Letaief, Khaled B. and Poor, H. Vincent and Zhang, Junshan},\n  journal={IEEE Journal on Selected Areas in Communications}, \n  title={Guest Editorial Communication-Efficient Distributed Learning Over Networks}, \n  year={2023},\n  volume={41},\n  number={4},\n  pages={845-850},\n  abstract={Distributed machine learning is envisioned as the bedrock of future intelligent networks, where agents exchange information with each other to train models collaboratively without uploading data to a central processor. Despite its broad applicability, a downside of distributed learning is the need for iterative information exchange between agents, which may lead to high communication overhead unaffordable in many practical systems with limited communication resources. To resolve this communication bottleneck, we need to devise communication-efficient distributed learning algorithms and protocols that can reduce the communication cost and simultaneously achieve satisfactory learning/optimization performance. Accomplishing this goal necessitates synergistic techniques from a diverse set of fields, including optimization, machine learning, wireless communications, game theory, and network/graph theory. This Special Issue is dedicated to communication-efficient distributed learning from multiple perspectives, including fundamental theories, algorithm design and analysis, and practical considerations.},\n  keywords={},\n  doi={10.1109/JSAC.2023.3241848},\n  ISSN={1558-0008},\n  month={April},\n  type={2},\n  tags={journal,CEDL,DML},\n}\n\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n Xuanyu Cao; Tamer Basar; Suhas N. Diggavi; Yonina C. Eldar; Khaled B. Letaief; H. Vincent Poor; and Junshan Zhang.\n\n\n \n \n \n \n \n Communication-Efficient Distributed Learning: An Overview.\n \n \n \n \n\n\n \n\n\n\n IEEE J. Sel. Areas Commun., 41(4): 851–873. 2023.\n \n\n\n\n
\n\n\n\n \n \n $\"Communication-Efficient$ Paper\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@article{DBLP:journals/jsac/CaoBDELPZ23a,\n  author       = {Xuanyu Cao and\n                  Tamer Basar and\n                  Suhas N. Diggavi and\n                  Yonina C. Eldar and\n                  Khaled B. Letaief and\n                  H. Vincent Poor and\n                  Junshan Zhang},\n  title        = {Communication-Efficient Distributed Learning: An Overview},\n  journal      = {{IEEE} J. Sel. Areas Commun.},\n  volume       = {41},\n  number       = {4},\n  pages        = {851--873},\n  year         = {2023},\n  url          = {https://doi.org/10.1109/JSAC.2023.3242710},\n  doi          = {10.1109/JSAC.2023.3242710},\n  timestamp    = {Tue, 28 Mar 2023 19:50:24 +0200},\n  biburl       = {https://dblp.org/rec/journals/jsac/CaoBDELPZ23a.bib},\n  type    = {2},\n  tags = {journal,CEDL,DML}\n}\n\n

\n\n\n\n

\n\n\n\n\n\n

\n\n\n\n\n

\n\n

\n \n 2022\n \n \n (2)\n \n \n

\n \n \n

\n \n 2\n \n \n (1)\n \n \n

\n \n \n

\n \n\n \n \n S. Can; T. Basar; S. N. Diggavi; Y. C. Eldar; Letaief K. B.; H. V. Poor; and J. Zhang.\n\n\n \n \n \n \n Communication-Efficient Distributed Learning: An Overview.\n \n \n \n\n\n \n\n\n\n to appear in IEEE Journal of Selected Areas in Communications (JSAC), 2022.. 2022.\n \n\n\n\n
\n\n\n\n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@ARTICLE{comm-efficientDL,\n  author={Can, S. and Basar, T. and Diggavi, S. N. and Eldar, Y. C. and Letaief K. B. and Poor, H. V. and Zhang, J.},\n  journal={to appear in IEEE Journal of Selected Areas in Communications (JSAC), 2022.}, \n  title={Communication-Efficient Distributed Learning: An Overview}, \n  year={2022},\n  keywords={},\n  doi={},\n  ISSN={},\n  month={},\n  tags={journal,CEDL,DML},\n  type={2},\n  }\n\n

\n\n\n\n

\n\n\n\n\n\n

\n\n

\n \n 4\n \n \n (1)\n \n \n

\n \n \n

\n \n\n \n \n Osama A. Hanna; Xinlin Li; Christina Fragouli; and Suhas Diggavi.\n\n\n \n \n \n \n Can we break the dependency in distributed detection?.\n \n \n \n\n\n \n\n\n\n In IEEE International Symposium on Information Theory (ISIT), pages 2720-2725, June 2022. \n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 7 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@INPROCEEDINGS{9834790,\n  author={Hanna, Osama A. and Li, Xinlin and Fragouli, Christina and Diggavi, Suhas},\n  booktitle={IEEE International Symposium on Information Theory (ISIT)}, \n  title={Can we break the dependency in distributed detection?}, \n  year={2022},\n  volume={},\n  number={},\n  pages={2720-2725},\n  abstract={We consider a distributed detection problem where sensors observe dependent observations. We ask, if we can allow the sensors to locally exchange a few bits with each other, whether we can use these bits to "break" the dependency of the sensor observations, and thus reduce the dependent detection problem to the much better-studied and understood case of conditionally independent observations. To this end, we propose an optimization problem that we prove is equivalent to minimizing the dependency between the sensor observations. This problem is in general NP-hard, however, we show that for at least some cases of Gaussian distributions it can be solved efficiently. For general distributions, we propose to use alternating minimization and derive a constant factor approximation algorithm. Numerical evaluations indicate that our approach can offer significant improvement in detection accuracy over alternative schemes.},\n  keywords={},\n  doi={10.1109/ISIT50566.2022.9834790},\n  ISSN={2157-8117},\n  month={June},\n  tags={conf,CEDL,DML},\n  type={4},\n}\n\n

\n\n\n\n\n\n

\n\n\n\n\n

\n\n

\n \n 2021\n \n \n (3)\n \n \n

\n \n \n

\n \n 1\n \n \n (2)\n \n \n

\n \n \n

\n \n\n \n \n Navjot Singh; Xuanyu Cao; Suhas Diggavi; and Tamer Basar.\n\n\n \n \n \n \n \n Decentralized Multi-Task Stochastic Optimization With Compressed Communications.\n \n \n \n \n\n\n \n\n\n\n arXiv preprint arXiv:2112.12373. 2021.\n \n\n\n\n
\n\n\n\n \n \n $\"Decentralized$ arxiv\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@article{singh2021decentralized,\n  title={Decentralized Multi-Task Stochastic Optimization With Compressed Communications},\n  author={Singh, Navjot and Cao, Xuanyu and Diggavi, Suhas and Basar, Tamer},\n  journal={arXiv preprint arXiv:2112.12373},\n  year={2021},\n  type={1},\n  tags={journalSub,CEDL,DML},\n  url_arxiv={https://arxiv.org/abs/2112.12373},\n  abstract={We consider a multi-agent network where each node has a stochastic (local) cost function that depends on the decision variable of that node and a random variable, and further the decision variables of neighboring nodes are pairwise constrained. There is an aggregate objective function for the network, composed additively of the expected values of the local cost functions at the nodes, and the overall goal of the network is to obtain the minimizing solution to this aggregate objective function subject to all the pairwise constraints. This is to be achieved at the node level using decentralized information and local computation, with exchanges of only compressed information allowed by neighboring nodes. The paper develops algorithms and obtains performance bounds for two different models of local information availability at the nodes: (i) sample feedback, where each node has direct access to samples of the local random variable to evaluate its local cost, and (ii) bandit feedback, where samples of the random variables are not available, but only the values of the local cost functions at two random points close to the decision are available to each node. For both models, with compressed communication between neighbors, we have developed decentralized saddle-point algorithms that deliver performances no different (in order sense) from those without communication compression; specifically, we show that deviation from the global minimum value and violations of the constraints are upper-bounded by (T−12) and (T−14), respectively, where T is the number of iterations. Numerical examples provided in the paper corroborate these bounds and demonstrate the communication efficiency of the proposed method.},\n}\n\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n Kaan Ozkara; Navjot Singh; Deepesh Data; and Suhas Diggavi.\n\n\n \n \n \n \n \n QuPeL: Quantized Personalization with Applications to Federated Learning.\n \n \n \n \n\n\n \n\n\n\n arXiv preprint arXiv:2102.11786. 2021.\n \n\n\n\n
\n\n\n\n \n \n $\"QuPeL:$ arxiv\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 18 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@article{ozkara2021qupel,\n  title = {QuPeL: Quantized Personalization with Applications to Federated Learning},\n  author = {Ozkara, Kaan and Singh, Navjot and Data, Deepesh and Diggavi, Suhas},\n  journal = {arXiv preprint arXiv:2102.11786},\n  year = {2021},\n  abstract = {Traditionally, federated learning (FL) aims to train a single global model while collaboratively using multiple clients and a server. Two natural challenges that FL algorithms face are heterogeneity in data across clients and collaboration of clients with {\\em diverse resources}. In this work, we introduce a \\textit{quantized} and \\textit{personalized} FL algorithm QuPeL that facilitates collective training with heterogeneous clients while respecting resource diversity. For personalization, we allow clients to learn \\textit{compressed personalized models} with different quantization parameters depending on their resources. Towards this, first we propose an algorithm for learning quantized models through a relaxed optimization problem, where quantization values are also optimized over. When each client participating in the (federated) learning process has different requirements of the quantized model (both in value and precision), we formulate a quantized personalization framework by introducing a penalty term for local client objectives against a globally trained model to encourage collaboration. We develop an alternating proximal gradient update for solving this quantized personalization problem, and we analyze its convergence properties. Numerically, we show that optimizing over the quantization levels increases the performance and we validate that QuPeL outperforms both FedAvg and local training of clients in a heterogeneous setting.},\n  tags = {journalSub,CEDL,DML},\n  url_arxiv = {https://arxiv.org/abs/2102.11786},\n  type = {1},\n}\n\n

\n\n\n\n\n\n

\n\n

\n \n 2\n \n \n (2)\n \n \n

\n \n \n

\n \n\n \n \n Navjot Singh; Deepesh Data; Jemin George; and Suhas Diggavi.\n\n\n \n \n \n \n \n SQuARM-SGD: Communication-Efficient Momentum SGD for Decentralized Optimization.\n \n \n \n \n\n\n \n\n\n\n IEEE Journal on Selected Areas in Information Theory, 2(3): 954-969. Sep. 2021.\n \n\n\n\n
\n\n\n\n \n \n $\"SQuARM-SGD:$ arxiv\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 7 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@ARTICLE{9513259,\n  author={Singh, Navjot and Data, Deepesh and George, Jemin and Diggavi, Suhas},\n  journal={IEEE Journal on Selected Areas in Information Theory}, \n  title={SQuARM-SGD: Communication-Efficient Momentum SGD for Decentralized Optimization}, \n  year={2021},\n  volume={2},\n  number={3},\n  pages={954-969},\n  abstract={In this paper, we propose and analyze SQuARM-SGD, a communication-efficient algorithm for decentralized training of large-scale machine learning models over a network. In SQuARM-SGD, each node performs a fixed number of local SGD steps using Nesterov’s momentum and then sends sparsified and quantized updates to its neighbors regulated by a locally computable triggering criterion. We provide convergence guarantees of our algorithm for general (non-convex) and convex smooth objectives, which, to the best of our knowledge, is the first theoretical analysis for compressed decentralized SGD with momentum updates. We show that the convergence rate of SQuARM-SGD matches that of vanilla SGD. We empirically show that including momentum updates in SQuARM-SGD can lead to better test performance than the current state-of-the-art which does not consider momentum updates.},\n  keywords={},\n  doi={10.1109/JSAIT.2021.3103920},\n  ISSN={2641-8770},\n  month={Sep.},\n  tags = {journal,CEDL,DML},\n  type = {2},\n  url_arxiv = {https://arxiv.org/abs/2005.07041},\n  }\n\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n Osama A. Hanna; Yahya H. Ezzeldin; Christina Fragouli; and Suhas Diggavi.\n\n\n \n \n \n \n \n Quantization of Distributed Data for Learning.\n \n \n \n \n\n\n \n\n\n\n IEEE Journal on Selected Areas in Information Theory, 2(3): 987-1001. Sep. 2021.\n \n\n\n\n
\n\n\n\n \n \n $\"Quantization$ arxiv\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 7 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@ARTICLE{9521982,\n  author={Hanna, Osama A. and Ezzeldin, Yahya H. and Fragouli, Christina and Diggavi, Suhas},\n  journal={IEEE Journal on Selected Areas in Information Theory}, \n  title={Quantization of Distributed Data for Learning}, \n  year={2021},\n  volume={2},\n  number={3},\n  pages={987-1001},\n  abstract={We consider machine learning applications that train a model by leveraging data distributed over a trusted network, where communication constraints can create a performance bottleneck. A number of recent approaches propose to overcome this bottleneck through compression of gradient updates. However, as models become larger, so does the size of the gradient updates. In this paper, we propose an alternate approach to learn from distributed data that quantizes data instead of gradients, and can support learning over applications where the size of gradient updates is prohibitive. Our approach leverages the dependency of the computed gradient on data samples, which lie in a much smaller space in order to perform the quantization in the smaller dimension data space. At the cost of an extra gradient computation, the gradient estimate can be refined by conveying the difference between the gradient at the quantized data point and the original gradient using a small number of bits. Lastly, in order to save communication, our approach adds a layer that decides whether to transmit a quantized data sample or not based on its importance for learning. We analyze the convergence of the proposed approach for smooth convex and non-convex objective functions and show that we can achieve order optimal convergence rates with communication that mostly depends on the data rather than the model (gradient) dimension. We use our proposed algorithm to train ResNet models on the CIFAR-10 and ImageNet datasets, and show that we can achieve an order of magnitude savings over gradient compression methods. These communication savings come at the cost of increasing computation at the learning agent, and thus our approach is beneficial in scenarios where communication load is the main problem.},\n  keywords={},\n  doi={10.1109/JSAIT.2021.3105359},\n  ISSN={2641-8770},\n  month={Sep.},\n  type={2},\n  tags={journal,DML,CEDL},\n  url_arxiv={https://arxiv.org/abs/2012.07913},\n}\n\n

\n\n\n\n\n\n

\n\n

\n \n 4\n \n \n (2)\n \n \n

\n \n \n

\n \n\n \n \n Navjot Singh; Deepesh Data; Jemin George; and Suhas Diggavi.\n\n\n \n \n \n \n \n SQuARM-SGD: Communication-Efficient Momentum SGD for Decentralized Optimization.\n \n \n \n \n\n\n \n\n\n\n In 2021 IEEE International Symposium on Information Theory (ISIT), pages 1212-1217, July 2021. \n \n\n\n\n
\n\n\n\n \n \n $\"SQuARM-SGD:$ arxiv\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 7 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@INPROCEEDINGS{9517986,\n  author={Singh, Navjot and Data, Deepesh and George, Jemin and Diggavi, Suhas},\n  booktitle={2021 IEEE International Symposium on Information Theory (ISIT)}, \n  title={SQuARM-SGD: Communication-Efficient Momentum SGD for Decentralized Optimization}, \n  year={2021},\n  volume={},\n  number={},\n  pages={1212-1217},\n  abstract={In this paper, we propose and analyze SQuARM-SGD, a communication-efficient algorithm for decentralized training of large-scale machine learning models over a network. In SQuARM-SGD, each node performs a fixed number of local SGD steps using Nesterov's momentum and then sends sparsified and quantized updates to its neighbors regulated by a locally computable triggering criterion. We provide convergence guarantees of our algorithm for general smooth objectives, which, to the best of our knowledge, is the first theoretical analysis for compressed decentralized SGD with momentum updates. We show that SQuARM-SGD converges at rate, matching that of vanilla distributed SGD. We empirically show that SQuARM-SGD saves significantly in total communicated bits over state-of-the-art without sacrificing much on accuracy.},\n  keywords={},\n  doi={10.1109/ISIT45174.2021.9517986},\n  ISSN={},\n  month={July},\n  tags = {conf,CEDL,DML},\n  type = {4},\n  url_arxiv = {https://arxiv.org/abs/2005.07041},\n  }\n\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n Kaan Ozkara; Navjot Singh; Deepesh Data; and Suhas Diggavi.\n\n\n \n \n \n \n \n QuPeD: Quantized Personalization via Distillation with Applications to Federated Learning.\n \n \n \n \n\n\n \n\n\n\n Advances in Neural Information Processing Systems, 34. 2021.\n \n\n\n\n
\n\n\n\n \n \n $\"QuPeD:$ arxiv\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 8 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@article{ozkara2021quped,\n  title={QuPeD: Quantized Personalization via Distillation with Applications to Federated Learning},\n  author={Ozkara, Kaan and Singh, Navjot and Data, Deepesh and Diggavi, Suhas},\n  journal={Advances in Neural Information Processing Systems},\n  volume={34},\n  year={2021},\n  tags = {conf,CEDL,DML,PFL},\n  url_arxiv = {https://arxiv.org/abs/2107.13892},\n  type = {4},\n  abstract={Traditionally, federated learning (FL) aims to train a single global model while collaboratively using multiple clients and a server. Two natural challenges that FL algorithms face are heterogeneity in data across clients and collaboration of clients with diverse resources. In this work, we introduce a quantized and personalized FL algorithm QuPeD that facilitates collective (personalized model compression) training via knowledge distillation (KD) among clients who have access to heterogeneous data and resources. For personalization, we allow clients to learn compressed personalized models with different quantization parameters and model dimensions/structures. Towards this, first we propose an algorithm for learning quantized models through a relaxed optimization problem, where quantization values are also optimized over. When each client participating in the (federated) learning process has different requirements for the compressed model (both in model dimension and precision), we formulate a compressed personalization framework by introducing knowledge distillation loss for local client objectives collaborating through a global model. We develop an alternating proximal gradient update for solving this compressed personalization problem, and analyze its convergence properties. Numerically, we validate that QuPeD outperforms competing personalized FL methods, FedAvg, and local training of clients in various heterogeneous settings.},\n}\n\n

\n\n\n\n\n\n

\n\n\n\n\n

\n\n

\n \n 2020\n \n \n (2)\n \n \n

\n \n \n

\n \n 2\n \n \n (2)\n \n \n

\n \n \n

\n \n\n \n \n D. Basu; D. Data; C. Karakus; and S. N. Diggavi.\n\n\n \n \n \n \n \n Qsparse-Local-SGD: Distributed SGD With Quantization, Sparsification, and Local Computations.\n \n \n \n \n\n\n \n\n\n\n IEEE Journal on Selected Areas in Information Theory, 1(1): 217-226. May 2020.\n \n\n\n\n
\n\n\n\n \n \n $\"Qsparse-Local-SGD:$ arxiv\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 3 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n

@article{9057579,\n abstract = {Communication bottleneck has been identified as a significant issue in distributed optimization of large-scale learning models. Recently, several approaches to mitigate this problem have been proposed, including different forms of gradient compression or computing local models and mixing them iteratively. In this paper, we propose Qsparse-local-SGD algorithm, which combines aggressive sparsification with quantization and local computation along with error compensation, by keeping track of the difference between the true and compressed gradients. We propose both synchronous and asynchronous implementations of Qsparse-local-SGD. We analyze convergence for Qsparse-local-SGD in the distributed setting for smooth non-convex and convex objective functions. We demonstrate that Qsparse-local-SGD converges at the same rate as vanilla distributed SGD for many important classes of sparsifiers and quantizers. We use Qsparse-local-SGD to train ResNet-50 on ImageNet and show that it results in significant savings over the state-of-the-art, in the number of bits transmitted to reach target accuracy.},\n author = {D. {Basu} and D. {Data} and C. {Karakus} and S. N. {Diggavi}},\n doi = {10.1109/JSAIT.2020.2985917},\n issn = {2641-8770},\n journal = {IEEE Journal on Selected Areas in Information Theory},\n keywords = {concave programming;convex programming;gradient methods;learning (artificial intelligence);optimisation;stochastic processes;stochastic gradient descent;convex objective functions;nonconvex objective functions;error compensation;sparsification;Qsparse-local-SGD;distributed setting;compressed gradients;true gradients;gradient compression;large-scale learning models;distributed optimization;quantization;vanilla distributed SGD;Quantization (signal);Convergence;Computational modeling;Stochastic processes;Training;Peer-to-peer computing;Optimization;Distributed optimization and learning;stochastic optimization;communication efficient training methods},\n month = {May},\n number = {1},\n pages = {217-226},\n tags = {journal,CEDL,DML},\n title = {Qsparse-Local-SGD: Distributed SGD With Quantization, Sparsification, and Local Computations},\n type = {2},\n url_arxiv = {https://arxiv.org/abs/1906.02367},\n volume = {1},\n year = {2020}\n}\n\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n O. A. Hanna; Y. H. Ezzeldin; T. Sadjadpour; C. Fragouli; and S. Diggavi.\n\n\n \n \n \n \n \n On Distributed Quantization for Classification.\n \n \n \n \n\n\n \n\n\n\n IEEE Journal on Selected Areas in Information Theory, 1(1): 237-249. May 2020.\n \n\n\n\n
\n\n\n\n \n \n $\"On$ arxiv\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n \n \n\n\n\n

@article{9061031,\n abstract = {We consider the problem of distributed feature quantization, where the goal is to enable a pretrained classifier at a central node to carry out its classification on features that are gathered from distributed nodes through communication constrained channels. We propose the design of distributed quantization schemes specifically tailored to the classification task: unlike quantization schemes that help the central node reconstruct the original signal as accurately as possible, our focus is not reconstruction accuracy, but instead correct classification. Our work does not make any a priori distributional assumptions on the data, but instead uses training data for the quantizer design. Our main contributions include: we prove NP-hardness of finding optimal quantizers in the general case; we design an optimal scheme for a special case; we propose quantization algorithms, that leverage discrete neural representations and training data, and can be designed in polynomial-time for any number of features, any number of classes, and arbitrary division of features across the distributed nodes. We find that tailoring the quantizers to the classification task can offer significant savings: as compared to alternatives, we can achieve more than a factor of two reduction in terms of the number of bits communicated, for the same classification accuracy.},\n author = {O. A. {Hanna} and Y. H. {Ezzeldin} and T. {Sadjadpour} and C. {Fragouli} and S. {Diggavi}},\n doi = {10.1109/JSAIT.2020.2986467},\n issn = {2641-8770},\n journal = {IEEE Journal on Selected Areas in Information Theory},\n keywords = {computational complexity;optimisation;quantisation (signal);signal classification;signal reconstruction;signal representation;distributed feature quantization;pretrained classifier;central node;distributed nodes;communication constrained channels;classification task;quantizer design;quantization algorithms;neural representations;training data;distributed quantization;NP-hardness;signal reconstruction;Quantization (signal);Image reconstruction;Training;Testing;Information theory;Task analysis;Training data;Distributed quantization;inference;communication-bounded inference;quantization with deep learning},\n month = {May},\n number = {1},\n pages = {237-249},\n tags = {journal,DML,CEDL},\n title = {On Distributed Quantization for Classification},\n type = {2},\n url_arxiv = {https://arxiv.org/abs/1911.00216},\n volume = {1},\n year = {2020}\n}\n\n

\n\n\n\n\n\n

\n\n

\n \n 4\n \n \n (1)\n \n \n

\n \n \n

\n \n\n \n \n Navjot Singh; Deepesh Data; Jemin George; and Suhas Diggavi.\n\n\n \n \n \n \n \n SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization.\n \n \n \n \n\n\n \n\n\n\n 2020 59th IEEE Conference on Decision and Control (CDC),3449-3456. 2020.\n \n\n\n\n
\n\n\n\n \n \n $\"SPARQ-SGD:$ arxiv\n \n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 2 downloads\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@article{singh2019sparq,\n abstract = {In this paper, we propose and analyze SPARQ-SGD, which is an event-triggered and compressed algorithm for decentralized training of large-scale machine learning models. Each node can locally compute a condition (event) which triggers a communication where quantized and sparsified local model parameters are sent. In SPARQ-SGD each node takes at least a fixed number (H) of local gradient steps and then checks if the model parameters have significantly changed compared to its last update; it communicates further compressed model parameters only when there is a significant change, as specified by a (design) criterion. We prove that the SPARQ-SGD converges as O(1nT) and O(1nT√) in the strongly-convex and non-convex settings, respectively, demonstrating that such aggressive compression, including event-triggered communication, model sparsification and quantization does not affect the overall convergence rate as compared to uncompressed decentralized training; thereby theoretically yielding communication efficiency for "free". We evaluate SPARQ-SGD over real datasets to demonstrate significant amount of savings in communication over the state-of-the-art.},\n author = {Singh, Navjot and Data, Deepesh and George, Jemin and Diggavi, Suhas},\n journal = {2020 59th IEEE Conference on Decision and Control (CDC)},\n tags = {conf,CEDL,DML},\n title = {SPARQ-SGD: Event-Triggered and Compressed Communication in Decentralized Stochastic Optimization},\n type = {4},\n url_arxiv = {https://arxiv.org/abs/1910.14280},\n year = {2020},\n pages={3449-3456},\n doi={10.1109/CDC42340.2020.9303828},\n ISSN={2576-2370},\n}\n\n

\n\n\n\n\n\n

\n\n\n\n\n

\n\n

\n \n 2019\n \n \n (1)\n \n \n

\n \n \n

\n \n 4\n \n \n (2)\n \n \n

\n \n \n

\n \n\n \n \n Debraj Basu; Deepesh Data; Can Karakus; and Suhas Diggavi.\n\n\n \n \n \n \n \n Qsparse-local-SGD: Distributed SGD with quantization, sparsification and local computations.\n \n \n \n \n\n\n \n\n\n\n In Advances in Neural Information Processing Systems, pages 14695–14706, 2019. \n \n\n\n\n
\n\n\n\n \n \n $\"Qsparse-local-SGD:$ arxiv\n \n \n\n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@inproceedings{basu2019qsparse,\n abstract = {Communication bottleneck has been identified as a significant issue in distributed optimization of large-scale learning models. Recently, several approaches to mitigate this problem have been proposed, including different forms of gradient compression or computing local models and mixing them iteratively. In this paper, we propose \\emph{Qsparse-local-SGD} algorithm, which combines aggressive sparsification with quantization and local computation along with error compensation, by keeping track of the difference between the true and compressed gradients. We propose both synchronous and asynchronous implementations of \\emph{Qsparse-local-SGD}. We analyze convergence for \\emph{Qsparse-local-SGD} in the \\emph{distributed} setting for smooth non-convex and convex objective functions. We demonstrate that \\emph{Qsparse-local-SGD} converges at the same rate as vanilla distributed SGD for many important classes of sparsifiers and quantizers. We use \\emph{Qsparse-local-SGD} to train ResNet-50 on ImageNet and show that it results in significant savings over the state-of-the-art, in the number of bits transmitted to reach target accuracy.},\n author = {Basu, Debraj and Data, Deepesh and Karakus, Can and Diggavi, Suhas},\n booktitle = {Advances in Neural Information Processing Systems},\n pages = {14695--14706},\n tags = {conf,CEDL,DML},\n title = {Qsparse-local-SGD: Distributed SGD with quantization, sparsification and local computations},\n type = {4},\n url_arxiv = {https://arxiv.org/abs/1906.02367},\n year = {2019}\n}\n\n

\n\n\n\n\n\n

\n\n\n

\n \n\n \n \n Yahya H Ezzeldin; Christina Fragouli; and Suhas Diggavi.\n\n\n \n \n \n \n Quantizing Signals for Linear Classification.\n \n \n \n\n\n \n\n\n\n In 2019 IEEE International Symposium on Information Theory (ISIT), pages 912–916, 2019. IEEE\n \n\n\n\n
\n\n\n\n \n\n \n \n doi\n \n \n\n \n link\n \n \n\n bibtex\n \n\n \n \n \n abstract \n \n\n \n \n \n 1 download\n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n\n

@inproceedings{ezzeldin2019quantizing,\n abstract = {In many machine learning applications, once we have learned a classifier, in order to apply it, we may still need to gather features from distributed sensors over communication constrained channels. In this paper, we propose a polynomial complexity algorithm for feature quantization tailored to minimizing the classification error of a linear classifier. Our scheme produces scalar quantizers that are well-tailored to delay-sensitive applications, operates on the same training data used to learn the classifier, and allows each distributed sensor to operate independently of each other. Numerical evaluation indicates up to 65% benefits over alternative approaches. Additionally, we provide an example where, jointly designing the linear classifier and the quantization scheme, can outperform sequential designs.},\n author = {Ezzeldin, Yahya H and Fragouli, Christina and Diggavi, Suhas},\n booktitle = {2019 IEEE International Symposium on Information Theory (ISIT)},\n organization = {IEEE},\n pages = {912--916},\n tags = {conf,DML,CEDL},\n title = {Quantizing Signals for Linear Classification},\n type = {4},\n doi = {10.1109/ISIT.2019.8849589},\n year = {2019}\n}\n\n

\n\n\n\n\n\n

\n\n\n\n\n

\n\n\n \n\n \n \n \n \n\n